Analysis on Fertility of Mother’s Hispanic Origin

Load the library

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.5.0 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(readr)

Get Data

Natality_MOH <- read_delim("Natality,MOH.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 5362 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (11): Notes, Mother's Hispanic Origin, Mother's Hispanic Origin Code, Mo...
## dbl  (3): Year, Year Code, Births
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Look Data

glimpse(Natality_MOH) 
## Rows: 5,362
## Columns: 14
## $ Notes                               <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ `Mother's Hispanic Origin`          <chr> "Hispanic or Latino", "Hispanic or…
## $ `Mother's Hispanic Origin Code`     <chr> "2135-2", "2135-2", "2135-2", "213…
## $ `Mother's Single Race 6`            <chr> "American Indian or Alaska Native"…
## $ `Mother's Single Race 6 Code`       <chr> "1002-5", "1002-5", "1002-5", "100…
## $ `Census Division of Residence`      <chr> "Division 1: New England", "Divisi…
## $ `Census Division of Residence Code` <chr> "CENS-D1", "CENS-D1", "CENS-D1", "…
## $ Year                                <dbl> 2016, 2016, 2016, 2016, 2016, 2017…
## $ `Year Code`                         <dbl> 2016, 2016, 2016, 2016, 2016, 2017…
## $ `Age of Mother 9`                   <chr> "15-19 years", "20-24 years", "25-…
## $ `Age of Mother 9 Code`              <chr> "15-19", "20-24", "25-29", "30-34"…
## $ Births                              <dbl> 15, 39, 49, 35, 19, 17, 51, 54, 42…
## $ `Female Population`                 <chr> "1631", "1730", "1681", "1564", "1…
## $ `Fertility Rate`                    <chr> "9.20", "22.54", "29.15", "22.38",…

Rename and Select

data <- Natality_MOH %>%
      rename ( Origin = `Mother's Hispanic Origin`  ,
               Race = `Mother's Single Race 6` ,
               Region = `Census Division of Residence Code`,
               Birth = Births , 
               Pop = `Female Population`, 
               Rate = `Fertility Rate` ,
               Age = `Age of Mother 9 Code`  ) %>% 
        select( Origin , Race, Birth , Pop,Rate ,Age , Year , Region )
        

glimpse(data)
## Rows: 5,362
## Columns: 8
## $ Origin <chr> "Hispanic or Latino", "Hispanic or Latino", "Hispanic or Latino…
## $ Race   <chr> "American Indian or Alaska Native", "American Indian or Alaska …
## $ Birth  <dbl> 15, 39, 49, 35, 19, 17, 51, 54, 42, 34, 23, 56, 65, 54, 47, 21,…
## $ Pop    <chr> "1631", "1730", "1681", "1564", "1595", "1626", "1725", "1716",…
## $ Rate   <chr> "9.20", "22.54", "29.15", "22.38", "11.91", "10.46", "29.57", "…
## $ Age    <chr> "15-19", "20-24", "25-29", "30-34", "35-39", "15-19", "20-24", …
## $ Year   <dbl> 2016, 2016, 2016, 2016, 2016, 2017, 2017, 2017, 2017, 2017, 201…
## $ Region <chr> "CENS-D1", "CENS-D1", "CENS-D1", "CENS-D1", "CENS-D1", "CENS-D1…

Recode

data_recode <- data %>%
  mutate(
         Race = ifelse(Race == "American Indian or Alaska Native","AmInd",Race),
         Race = ifelse(Race == "Asian or Pacific Islander","API",Race),
         Race = ifelse(Race == "Black or African American","Black",Race),
         Pop = as.numeric(Pop),
         Rate = as.numeric(Rate)/1000) %>% 
filter(Race != "Not Reported") %>%
  drop_na()
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
glimpse(data_recode)
## Rows: 3,721
## Columns: 8
## $ Origin <chr> "Hispanic or Latino", "Hispanic or Latino", "Hispanic or Latino…
## $ Race   <chr> "AmInd", "AmInd", "AmInd", "AmInd", "AmInd", "AmInd", "AmInd", …
## $ Birth  <dbl> 15, 39, 49, 35, 19, 17, 51, 54, 42, 34, 23, 56, 65, 54, 47, 21,…
## $ Pop    <dbl> 1631, 1730, 1681, 1564, 1595, 1626, 1725, 1716, 1587, 1653, 172…
## $ Rate   <dbl> 0.00920, 0.02254, 0.02915, 0.02238, 0.01191, 0.01046, 0.02957, …
## $ Age    <chr> "15-19", "20-24", "25-29", "30-34", "35-39", "15-19", "20-24", …
## $ Year   <dbl> 2016, 2016, 2016, 2016, 2016, 2017, 2017, 2017, 2017, 2017, 201…
## $ Region <chr> "CENS-D1", "CENS-D1", "CENS-D1", "CENS-D1", "CENS-D1", "CENS-D1…

Grid Plot

data_recode %>%
  filter(Age == "15-19") %>%
  ggplot(aes(x = Origin , y = Rate)) +
  geom_point(alpha = 0.4) +
  facet_grid(Race~Region) +
  theme(axis.text.x = element_text(angle = 90))+
  ggtitle("Plot of Origin for 15-19 age group by Race and Region")

Flip the Grid

data_recode %>%
  filter(Age == "15-19") %>%
  ggplot(aes(x = Origin , y = Rate)) +
  geom_point(alpha = 0.4) +
  facet_grid(Region~Race) +
  theme(axis.text.x = element_text(angle = 90))+
  ggtitle("Flip Grid Plot of Origin for 15-19 age group by Race and Region")

National TFR by Origin

plot = data_recode %>% 
  group_by( Year,Origin, Age) %>% 
  summarize(Birth = sum(Birth),
            Pop = sum(Pop)) %>% 
  mutate(Rate = Birth/Pop)%>% 
  summarize(TFR = sum(Rate) * 5) %>% 
  ungroup() %>% 
  ggplot(aes(x = Year,y = TFR, color = Origin)) +
  geom_point()
## `summarise()` has grouped output by 'Year', 'Origin'. You can override using
## the `.groups` argument.
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
ggplotly(plot)

Plot with Origin and Region

plot1 = data_recode %>% 
  group_by(Year, Region, Origin, Age) %>% 
  summarize(Birth = sum(Birth),
            Pop = sum(Pop)) %>% 
  mutate(Rate = Birth/Pop)%>% 
  summarize(TFR = sum(Rate) * 5) %>% 
  ungroup() %>% 
  ggplot(aes(x = Year,y = TFR, color = Origin)) +
  geom_point() +
  theme(axis.text.x = element_text(angle = 90))+
  facet_grid(Origin~Region)
## `summarise()` has grouped output by 'Year', 'Region', 'Origin'. You can
## override using the `.groups` argument.
## `summarise()` has grouped output by 'Year', 'Region'. You can override using
## the `.groups` argument.
ggplotly(plot1)